#!/bin/sh
#  
#  $Id$
#
#  This file is part of the OpenLink Software Virtuoso Open-Source (VOS)
#  project.
#  
#  Copyright (C) 1998-2024 OpenLink Software
#  
#  This project is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License as published by the
#  Free Software Foundation; only version 2 of the License, dated June 1991.
#  
#  This program is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
#  General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#  

# ----------------------------------------------------------------------
#  Fix issues with LOCALE
# ----------------------------------------------------------------------
LANG=C
LC_ALL=POSIX
export LANG LC_ALL


tabtoheader()
{
echo -n "Translation from Unicode3 table to $2 ...	"
sed 's/^\(.*\)$/UNICODE3_REC(\1),/g' < $1 > tmp/4.c
sed 's/^UNICODE3_REC(\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^)]*\)),$/UNICODE3_REC( 0x\1	, "\3"	, \4	, "\5"	,e \7,f \8, UNICODE3_F6(\6) , "\2")/g' < tmp/4.c > tmp/5.c
sed 's/,f \([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^,]*\), UNICODE3_F6(\([^)]*\)) ,/,n \1,o \2,h \8,i \7,j \6,m \3,UNICODE3_F6(\9),k \5,l \4,/g' < tmp/5.c > tmp/6.c
echo -n " 20% done...	"
sed 's/,e ,/,e -1,/g' < tmp/6.c > tmp/7.c
sed 's/,e \([^,]*\),/, \1	,/g' < tmp/7.c > tmp/8.c
sed 's/,i ,/,i 0000,/g' < tmp/8.c > tmp/9.c
sed 's/,i \([^,]*\),/, 0x\1	,/g' < tmp/9.c > tmp/A.c
sed 's/,j ,/,j 0000,/g' < tmp/A.c > tmp/B.c
sed 's/,j \([^,]*\),/, 0x\1	,/g' < tmp/B.c > tmp/C.c
sed 's/,h ,/,h 0000,/g' < tmp/C.c > tmp/D.c
sed 's/,h \([^,]*\),/, 0x\1	,/g' < tmp/D.c > tmp/E.c
sed 's/,m Y,/, 1	,/g' < tmp/E.c > tmp/F.c
sed 's/,m N,/, 0	,/g' < tmp/F.c > tmp/G.c
sed 's/,n ,/,n -1,/g' < tmp/G.c > tmp/H.c
sed 's/,n \([^,]*\),/, \1	,/g' < tmp/H.c > tmp/I.c
sed 's/,o ,/,o -1,/g' < tmp/I.c > tmp/J.c
sed 's/,o \([^,]*\),/, \1.0   	,/g' < tmp/J.c > tmp/K.c
sed 's/,k \([^,]*\),/, "\1"	,/g' < tmp/K.c > tmp/L.c
sed 's/,l \([^,]*\),/, "\1"	,/g' < tmp/L.c > tmp/M.c
echo -n " 70% done...	"
sed 's/,UNICODE3_F6(),/, NULL,/g' < tmp/M.c > tmp/N.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\)),/, UNICODE3_S1(UNICODE3_\2, 0x\4),/g' < tmp/N.c > tmp/O.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S2(UNICODE3_\2, 0x\4, 0x\5),/g' < tmp/O.c > tmp/P.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S3(UNICODE3_\2, 0x\4, 0x\5, 0x\6),/g' < tmp/P.c > tmp/Q.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S4(UNICODE3_\2, 0x\4, 0x\5, 0x\6, 0x\7),/g' < tmp/Q.c > tmp/R.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S5(UNICODE3_\2, 0x\4, 0x\5, 0x\6, 0x\7, 0x\8),/g' < tmp/R.c > tmp/S.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S6(UNICODE3_\2, 0x\4, 0x\5, 0x\6, 0x\7, 0x\8, 0x\9),/g' < tmp/S.c > tmp/T.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F ]*\)),/, UNICODE3_SX(UNICODE3_\2, UNICODE3_long_ligature("\4")),/g' < tmp/T.c > tmp/U.c
sed 's/,UNICODE3_F6(\([ ]*\)\([0-9A-F]*\)\([ ]*\)),/, UNICODE3_S1(UNICODE3_exact, 0x\2),/g' < tmp/U.c > tmp/V.c
sed 's/,UNICODE3_F6(\([ ]*\)\([0-9A-F]*\)\([ ]*\)\([0-9A-F]*\)\([ ]*\)),/, UNICODE3_S2(UNICODE3_exact, 0x\2, 0x\4),/g' < tmp/V.c > tmp/W.c

cat > $2 <<-EOF
/*
 *  This file is generated by unicodetab2h.sh
 *  
 *  This file is part of the OpenLink Software Virtuoso Open-Source (VOS)
 *  project.
 *  
 *  Copyright (C) 1998-2024 OpenLink Software
 *  
 *  This project is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the
 *  Free Software Foundation; only version 2 of the License, dated June 1991.
 *  
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 *  
 *  
 */
EOF
echo 'UNICODE3_HEADER' >> $2
cat tmp/W.c >> $2
echo 'UNICODE3_FOOTER' >> $2
echo " 100% done."
}


rm -rf tmp
mkdir tmp

echo -n "Search for lowecase characters...	"
grep ";[0-9A-F][0-9A-F][0-9A-F][0-9A-F]$" < unicode3.dat > tmp/tmp1.c
echo " 100% done."
echo -n "Search for uppercase characters...	"
grep ";[0-9A-F][0-9A-F][0-9A-F][0-9A-F];$" < unicode3.dat > tmp/tmp2.c
echo " 100% done."
echo -n "Search for 'logical space' characters...	"
grep ";0;WS;" < unicode3.dat > tmp/tmp3.c
echo " 100% done."
echo -n "Search for modified/compatible characters...	"
grep -E "^[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[0-9A-F][0-9A-F][0-9A-F][0-9A-F](( [0-9A-F][0-9A-F][0-9A-F][0-9A-F])?);" < unicode3.dat > tmp/tmp4.c
echo " 100% done."
tabtoheader tmp/tmp1.c unicode3_lowers.h
tabtoheader tmp/tmp2.c unicode3_uppers.h
tabtoheader tmp/tmp3.c unicode3_spaces.h
tabtoheader tmp/tmp4.c unicode3_basechars.h
tabtoheader unicode3.dat unicode3_all_chars.h
cat unicode3_all_chars.h | \
  sed 's/UNICODE3_HEADER/UNICODE3_HEADER();/g' | \
  sed 's/UNICODE3_FOOTER/UNICODE3_FOOTER();/g' | \
  sed 's/^\(UNICODE3_REC.*\)$/\1;/g' | \
  sed 's/UNICODE3_S\([1-9X]\)(UNICODE3_\([a-zA-Z0-9]*\)/UNICODE3_S\1(UNICODE3_G("\2")/g' | \
  sed "s/\"/'/g" | \
  sed 's/0x\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\)/0hex\1/g' > unicode3_all_chars.sql

rm -rf tmp
